.386                   ;enable instructions
        .xmm                    ;enable instructions
        .model flat,c           ;use C naming convention (stdcall is default)

;       include C libraries
        includelib      msvcrtd
        includelib      oldnames
        includelib      legacy_stdio_definitions.lib    ;for scanf, printf, ...

		include asm_procs.inc
		;======================================================

        .data                   ;initialized data
		PREC_CUR = 5
		MAX_LEN  = 12
		
		.data?					;uninialized data


    .stack  4096            ;stack (optional, linker will default)

;=========================================

	;=========================================

	.data
	print_num_fmt byte "%d", 0dh, 0ah, 0
	
	print_num_fmt1 byte "%d: ",  0
	print_num_fmt2 byte "	%d",  0
	print_num_fmt3 byte "%d", 0
	print_num_cr byte 0dh, 0ah, 0

	div_result dword MAX_LEN dup(0) ;int div_result[MAX_LEN]
	rn dword MAX_LEN dup(0) ;int rn[MAX_LEN]
	sum dword MAX_LEN dup(0) ;int sum[MAX_LEN]
	new_rn dword (MAX_LEN*2) dup(0) ;int new_rn[MAX_LEN * 2]

    .code                   ;code

;void big_div_array(int fenzi, int fenmu, int* result, unsigned int prec)
big_div_array proc uses eax ebx ecx edi,
	fenzi:dword, fenmu:dword, result: ptr dword, prec: dword

	;for (i = 0; i < prec; i++) {
	;	result[i] = fenzi / fenmu;
	;	fenzi = 10 * (fenzi % fenmu);
	;}
	mov ecx, 0
	mov eax, fenzi
	mov ebx, fenmu
	mov edi, result

calc:
	div bl ;al, ah
	mov byte ptr [edi + ecx * 4], al
	mov al, ah
	mov ah, 10
	mul ah ;ax = al * ah = fenzi % fenmu * 10

	inc ecx
	cmp ecx, prec
	jb calc

	ret
big_div_array endp


;bigdata multiply
;cr = a*b
;void IntMultiply(int a[], int b[], int cr[], int ma, int nb)
IntMultiply proc uses eax ebx ecx esi edi,
	a: ptr dword, b: ptr dword, cr: ptr dword, ma: dword, nb: dword
	local index: dword ;index = i	

	;memset(cr, 0, (ma + nb) * sizeof(c[0]));
	mov ecx, ma
	add ecx, nb
	shl ecx, 2
	invoke asm_memset, cr, 0, ecx

	;for (i = 0; i < ma; ++i) {
	;	for (j = 0; j < nb; ++j)
	;		cr[i + j + 1] = cr[i + j + 1] + a[i] * b[j];
	mov esi, a ;esi = a
	mov edx, b ;edx = b
	mov edi, cr ;edi = cr
	
	mov ebx, 0 ;ebx = i
	mov index, 0

iloop:
	mov ecx, 0 ;ecx = j
	;update index
	mov eax, ebx
	shl eax, 2 ;i * 4
	mov index, eax

jloop:
	push ebx
	mov eax, [esi + ebx * 4] ;a[i]
	mov ebx, [edx + ecx * 4] ;b[j]
	mul bl ;ax = al * bl

	;index = i + j = ebx*4 + ecx*4
	mov ebx, ecx
	shl ebx, 2 ;ebx = ecx * 4
	add ebx, index ;only j changed
	add [edi + ebx + 4], eax;cr[i+j+1]
	pop ebx

	inc ecx
	cmp ecx, nb
	jb jloop

	inc ebx
	cmp ebx, ma
	jb iloop

	;for (i = ma + nb - 1; i > 0; --i) {
	;	if (c[i] > 9) {
	;		c[i - 1] = c[i - 1] + c[i] / 10;
	;		c[i] = c[i] % 10;
	;	}
	;}
	mov ecx, ma
	add ecx, nb
	
carry:
	dec ecx
	cmp ecx, 0
	je memmove
	mov edi, cr ;edi = cr
	mov esi, ecx
	;esi = ecx = i
	cmp byte ptr [edi + esi * 4], 9
	jna carry
	dec esi ;esi = i - 1
	mov eax, [edi + ecx * 4] ;eax = c[i]
	mov bl, 10
	div bl
	add byte ptr [edi + esi * 4], al
	mov byte ptr [edi + ecx * 4], ah
	jmp carry

memmove:
	;memmove(c, c + 1, (ma+nb-1) * sizeof(int));
	mov edi, cr
	mov esi, edi
	add esi, 4 ;c+1

	mov ecx, ma
	add ecx, nb
	dec ecx
	shl ecx, 2 ;dword to bytes

	invoke asm_memmove, edi, esi, ecx
	ret
IntMultiply endp

.data
	a_ary dword  0,6,6,6,6,7,0
	b_ary dword  0,4,0,0,0,0,0
	res_ary dword 14 dup(0)

.code
;calc 89 * 72 = 6408
testMul proc
	invoke IntMultiply, addr a_ary, addr b_ary, addr res_ary, 7, 7

	mov ecx, 0
	mov esi, offset res_ary
s:
	mov eax, [esi + ecx * 4]
	push ecx
	invoke printf, addr print_num_fmt, eax
	pop ecx
	inc ecx
	cmp ecx, 7
	jb s

	ret
testMul endp

;//c = a + b
;void bigdata_add(int* a, int* b, int* c, int len)

bigdata_add proc uses eax ebx ecx esi edi edx,
	a:ptr dword, b: ptr dword, cr: ptr dword, len: dword

	mov esi, a
	mov edi, cr
	mov edx, b
	mov ecx, -1
	;for (i = 0; i < len; i++)

loopi:
	inc ecx
	cmp ecx, len
	jnb exit ;ecx >= len

	;val = a[i] + b[i];
	;c[i] = val % 10;
	mov eax, [esi + ecx * 4]
	add eax, [edx + ecx * 4]
	push eax
	mov bl, 10
	div bl
	mov [edi + ecx * 4], ah

	pop eax
	cmp eax, 10
	jb loopi

	push ecx
	;j = i - 1;
	;c[j] = c[j] + 1;
	dec ecx ;ecx = j = i - 1
	inc byte ptr [edi + ecx * 4]

carry:
	;while (c[j] >= 10)
		;c[j] = c[j] % 10;
		;j = j - 1;
		;c[j] = c[j] + 1;
	cmp byte ptr [edi + ecx * 4], 10
	pop ecx
	jb loopi

	;c[j] = c[j] % 10
	mov bl, 10
	mov eax,  [edi + ecx * 4]
	div  bl 
	mov byte ptr [edi + ecx * 4], ah
	dec ecx
	inc byte ptr [edi + ecx * 4]
	jmp carry

exit:
	ret
bigdata_add endp

;void carry_ary(int* c, int len)
carry_ary proc uses eax ebx ecx edx edi,
	cr: ptr dword, len: dword

	mov edi, cr
	mov edx, len
	;if (c[len - 1] >= 5) {
	dec edx
	cmp byte ptr [edi + edx * 4], 5
	jb exit

	;here, c[len - 1] >= 5, edx = len - 1
	mov ecx, edx
	mov dword ptr [edi + ecx * 4], 0 ;c[i] = 0
	;3.121497 to 3.12150
	;i--;
	;c[i]++; carry
	dec ecx
	inc dword ptr [edi + ecx * 4]

carry:
	cmp byte ptr [edi + ecx * 4], 10 ;c[i]<10
	jb exit

	;here c[i] >= 10
	;c[i] = c[i] % 10;
	;i--;
	;c[i]++;
	mov eax, [edi + ecx * 4]
	mov bl, 10
	div bl
	mov [edi + ecx * 4], ah
	dec ecx
	inc byte ptr [edi + ecx * 4]
	jmp carry

exit:
	ret
carry_ary endp

;void computePiArray(unsigned int prec, unsigned int termN)
computePiArray proc uses ecx esi,
	prec: dword, termN: dword

	;rn[0] = 2;
	;sum[0] = 2;
	mov byte ptr rn[0], 2
	mov byte ptr sum[0], 2

	mov eax, prec
	;for (n = 1; n < termN; n++) {
	mov ecx, 1 ;ecx = n
calc:
	;print info
	
	;big_div_array(n, 2 * n + 1, div_result, prec);
	mov eax, ecx
	shl eax, 1
	inc eax
	invoke big_div_array, ecx, eax, addr div_result, prec

	;new_rn = rn * n/2n+1
	;IntMultiply(rn, div_result, new_rn, prec, prec)
	invoke IntMultiply, addr rn, addr div_result,  addr new_rn, prec, prec
	;bigdata_add(sum, new_rn, sum, prec);
	invoke bigdata_add, addr sum, addr new_rn, addr sum, prec
	;carry_ary(new_rn, prec);
	invoke carry_ary, offset new_rn, prec
	;print_array(n, new_rn, sum, prec);
	push ecx
	invoke print_array, ecx, addr new_rn, addr sum, prec
	pop ecx

	;rn = new value for next loop
	;memcpy(rn, new_rn, MAX_LEN * sizeof(int));
	invoke memcpy, addr rn, addr new_rn, MAX_LEN * 4

	inc ecx
	cmp ecx, termN
	jb calc

	ret
computePiArray endp

;=========================================

    public  main

main    proc
		;call testMul
		invoke computeTermN, PREC_CUR
		invoke 	computePiArray, PREC_CUR + 2, eax

        xor     eax,eax
        ret
main    endp

        end
